/*==============================================================================
FILE NAME: Create_county_complaints.do
INPUTS: incidents.dta
OUTPUTS: county complaints.dta, county air complaints.dta
CREATED: 24 November 2021
UPDATED: 26 September 2023
==============================================================================*/

//create county-by-month panel of complaints

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data"  // Define global paths for replication package
} 
*/

use "$processed_data/incidents.dta", clear

//explore zip code variables: ZIP and ZIPLOCATION

destring ZIP, replace
destring ZIPLOCATION, replace
gen zip_nottx=0
replace zip_nottx = 1 if ZIP < 73301 
replace zip_nottx = 1 if ZIP > 88589
replace zip_nottx=. if ZIP==.
tab zip_nottx, missing

gen ziploc_nottx=0
replace ziploc_nottx = 1 if ZIPLOCATION < 73301 
replace ziploc_nottx = 1 if ZIPLOCATION > 88589
replace ziploc_nottx=. if ZIPLOCATION==.
tab ziploc_nottx, missing

gen same_zip=0
replace same_zip=1 if ZIP==ZIPLOCATION
replace same_zip=. if ZIP==.
replace same_zip=. if ZIPLOCATION==.
tab same_zip, missing

//no clear winner on zip code variables...we may want to reach out to TCEQ for clarification. Ideally we'd know which zip code variable corresponds to the RN and which to the CN

//explore county variables: RegulatedEntityCounty and CNTY
//no missing values for either

gen same_county=0
replace same_county=1 if RegulatedEntityCounty==CNTY
tab same_county, missing

//all values of 2 county variables are identical
//for general deterrence variables, focus on county

//create monthly count of complaints by county
clear
use "$processed_data/incidents.dta"
keep ComplaintIncident CNTY IncidentRecDate IncidentStatus
duplicates drop
isid ComplaintIncident
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0
gen year=year(IncidentRecDate)
gen month=month(IncidentRecDate)
gen mdate=ym(year,month)
format mdate %tm
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
tab IncidentStatus
drop if IncidentStatus=="REFERRED"
//15% of complaints are referred outside TCEQ; drop these
drop year month IncidentRecDate ComplaintIncident IncidentStatus
gen complaint=1
rename CNTY county
sort county mdate
collapse (sum) complaint, by(county mdate)
rename complaint county_complaint
label var county_complaint "# complaints in county-month"
sort county mdate
save "$processed_data/county complaints.dta", replace

//create monthly count of air complaints by county
use "$processed_data/incidents.dta", clear
keep if Media=="AIR"
keep ComplaintIncident CNTY IncidentRecDate IncidentStatus
duplicates drop
isid ComplaintIncident
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0
gen year=year(IncidentRecDate)
gen month=month(IncidentRecDate)
gen mdate=ym(year,month)
format mdate %tm
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
tab IncidentStatus
drop if IncidentStatus=="REFERRED"
//almost 6% of air complaints are referred outside TCEQ; drop these
drop year month IncidentRecDate ComplaintIncident IncidentStatus
gen air_complaint=1
rename CNTY county
sort county mdate
collapse (sum) air_complaint, by(county mdate)
rename air_complaint county_air_complaint
label var county_air_complaint "# air complaints in county-month"
sort county mdate
save "$processed_data/county air complaints.dta", replace